Use
rmarkdown::render("MSc_AS-SMM634-Group4-Project.rmd", output_dir = "docs")
to render…
rm(list = ls())
# CarDataRead.R
# source(file.path(".", "CarDataRead.R"))
df <- read.csv("car_price.csv") |>
mutate(fsymboling = as.factor(symboling)) |>
mutate(safety = case_match(
symboling,
c(-2, -1) ~ "<-1",
0 ~ "0",
1 ~ "1",
2 ~ "2",
3 ~ "3"
)) |>
mutate(safetyIncr = case_match(
symboling,
c(-2, -1) ~ "4",
0 ~ "3",
1 ~ "2",
2 ~ "1",
3 ~ "0"
)) |>
mutate(safetyIncr2 = case_match(
symboling,
0 ~ "Base",
c(-2, -1) ~ "Safer",
c(1, 2, 3) ~ "Riskier"
)) |>
mutate(cylinderNum = case_match(cylindernumber,
c("two", "three") ~ "leq_three",
c("eight", "twelve") ~ "geq_eight",
.default = cylindernumber
))
table(df$cylinderNum)##
## five four geq_eight leq_three six
## 11 159 6 5 24
Want to check for any typos, missing data, or NaN values.
Extracting car manufacturer from first part of CarName
and correcting spelling errors.
# extract car manufacturer from first part of CarName
df$carManufacturer <- sapply(strsplit(df$CarName, " +"), `[`, 1)
# print
table(df$carManufacturer)##
## alfa-romero audi bmw buick chevrolet dodge honda
## 3 7 8 8 3 9 13
## isuzu jaguar maxda mazda mercury mitsubishi nissan
## 4 3 2 15 1 13 17
## Nissan peugeot plymouth porcshce porsche renault saab
## 1 11 7 1 4 2 6
## subaru toyota toyouta vokswagen volkswagen volvo vw
## 12 31 1 1 9 11 2
Our assumptions are as follows:
| Typo / Mis-spelling | Correct Spelling |
|---|---|
| maxda | mazda |
| Nissan | nissan |
| porcshce | porsche |
| toyouta | toyota |
| vokswagen | volkswagen |
| vw | volkswagen |
df <- df %>% mutate(carManufacturer = case_match(carManufacturer,
"maxda" ~ "mazda",
"Nissan" ~ "nissan",
"porcshce" ~ "porsche",
"toyouta" ~ "toyota",
c("vokswagen", "vw") ~ "volkswagen",
.default = carManufacturer
))
table(df$carManufacturer)##
## alfa-romero audi bmw buick chevrolet dodge honda
## 3 7 8 8 3 9 13
## isuzu jaguar mazda mercury mitsubishi nissan peugeot
## 4 3 17 1 13 18 11
## plymouth porsche renault saab subaru toyota volkswagen
## 7 5 2 6 12 32 12
## volvo
## 11
## car_ID symboling CarName fueltype aspiration
## 0 0 0 0 0
## doornumber carbody drivewheel enginelocation wheelbase
## 0 0 0 0 0
## carlength carwidth carheight curbweight enginetype
## 0 0 0 0 0
## cylindernumber enginesize fuelsystem boreratio stroke
## 0 0 0 0 0
## compressionratio horsepower peakrpm citympg highwaympg
## 0 0 0 0 0
## price fsymboling safety safetyIncr safetyIncr2
## 0 0 0 0 0
## cylinderNum carManufacturer
## 0 0
## Total number of NaN values: 0
Looks good - no NaN values.
## car_ID symboling CarName fueltype
## Min. : 1 Min. :-2.0000 Length:205 Length:205
## 1st Qu.: 52 1st Qu.: 0.0000 Class :character Class :character
## Median :103 Median : 1.0000 Mode :character Mode :character
## Mean :103 Mean : 0.8341
## 3rd Qu.:154 3rd Qu.: 2.0000
## Max. :205 Max. : 3.0000
## aspiration doornumber carbody drivewheel
## Length:205 Length:205 Length:205 Length:205
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## enginelocation wheelbase carlength carwidth carheight
## Length:205 Min. : 86.60 Min. :141.1 Min. :60.30 Min. :47.80
## Class :character 1st Qu.: 94.50 1st Qu.:166.3 1st Qu.:64.10 1st Qu.:52.00
## Mode :character Median : 97.00 Median :173.2 Median :65.50 Median :54.10
## Mean : 98.76 Mean :174.0 Mean :65.91 Mean :53.72
## 3rd Qu.:102.40 3rd Qu.:183.1 3rd Qu.:66.90 3rd Qu.:55.50
## Max. :120.90 Max. :208.1 Max. :72.30 Max. :59.80
## curbweight enginetype cylindernumber enginesize
## Min. :1488 Length:205 Length:205 Min. : 61.0
## 1st Qu.:2145 Class :character Class :character 1st Qu.: 97.0
## Median :2414 Mode :character Mode :character Median :120.0
## Mean :2556 Mean :126.9
## 3rd Qu.:2935 3rd Qu.:141.0
## Max. :4066 Max. :326.0
## fuelsystem boreratio stroke compressionratio horsepower
## Length:205 Min. :2.54 Min. :2.070 Min. : 7.00 Min. : 48.0
## Class :character 1st Qu.:3.15 1st Qu.:3.110 1st Qu.: 8.60 1st Qu.: 70.0
## Mode :character Median :3.31 Median :3.290 Median : 9.00 Median : 95.0
## Mean :3.33 Mean :3.255 Mean :10.14 Mean :104.1
## 3rd Qu.:3.58 3rd Qu.:3.410 3rd Qu.: 9.40 3rd Qu.:116.0
## Max. :3.94 Max. :4.170 Max. :23.00 Max. :288.0
## peakrpm citympg highwaympg price fsymboling
## Min. :4150 Min. :13.00 Min. :16.00 Min. : 5118 -2: 3
## 1st Qu.:4800 1st Qu.:19.00 1st Qu.:25.00 1st Qu.: 7788 -1:22
## Median :5200 Median :24.00 Median :30.00 Median :10295 0 :67
## Mean :5125 Mean :25.22 Mean :30.75 Mean :13277 1 :54
## 3rd Qu.:5500 3rd Qu.:30.00 3rd Qu.:34.00 3rd Qu.:16503 2 :32
## Max. :6600 Max. :49.00 Max. :54.00 Max. :45400 3 :27
## safety safetyIncr safetyIncr2 cylinderNum
## Length:205 Length:205 Length:205 Length:205
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## carManufacturer
## Length:205
## Class :character
## Mode :character
##
##
##
## 'data.frame': 205 obs. of 32 variables:
## $ car_ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ symboling : int 3 3 1 2 2 2 1 1 1 0 ...
## $ CarName : chr "alfa-romero giulia" "alfa-romero stelvio" "alfa-romero Quadrifoglio" "audi 100 ls" ...
## $ fueltype : chr "gas" "gas" "gas" "gas" ...
## $ aspiration : chr "std" "std" "std" "std" ...
## $ doornumber : chr "two" "two" "two" "four" ...
## $ carbody : chr "convertible" "convertible" "hatchback" "sedan" ...
## $ drivewheel : chr "rwd" "rwd" "rwd" "fwd" ...
## $ enginelocation : chr "front" "front" "front" "front" ...
## $ wheelbase : num 88.6 88.6 94.5 99.8 99.4 ...
## $ carlength : num 169 169 171 177 177 ...
## $ carwidth : num 64.1 64.1 65.5 66.2 66.4 66.3 71.4 71.4 71.4 67.9 ...
## $ carheight : num 48.8 48.8 52.4 54.3 54.3 53.1 55.7 55.7 55.9 52 ...
## $ curbweight : int 2548 2548 2823 2337 2824 2507 2844 2954 3086 3053 ...
## $ enginetype : chr "dohc" "dohc" "ohcv" "ohc" ...
## $ cylindernumber : chr "four" "four" "six" "four" ...
## $ enginesize : int 130 130 152 109 136 136 136 136 131 131 ...
## $ fuelsystem : chr "mpfi" "mpfi" "mpfi" "mpfi" ...
## $ boreratio : num 3.47 3.47 2.68 3.19 3.19 3.19 3.19 3.19 3.13 3.13 ...
## $ stroke : num 2.68 2.68 3.47 3.4 3.4 3.4 3.4 3.4 3.4 3.4 ...
## $ compressionratio: num 9 9 9 10 8 8.5 8.5 8.5 8.3 7 ...
## $ horsepower : int 111 111 154 102 115 110 110 110 140 160 ...
## $ peakrpm : int 5000 5000 5000 5500 5500 5500 5500 5500 5500 5500 ...
## $ citympg : int 21 21 19 24 18 19 19 19 17 16 ...
## $ highwaympg : int 27 27 26 30 22 25 25 25 20 22 ...
## $ price : num 13495 16500 16500 13950 17450 ...
## $ fsymboling : Factor w/ 6 levels "-2","-1","0",..: 6 6 4 5 5 5 4 4 4 3 ...
## $ safety : chr "3" "3" "1" "2" ...
## $ safetyIncr : chr "0" "0" "2" "1" ...
## $ safetyIncr2 : chr "Riskier" "Riskier" "Riskier" "Riskier" ...
## $ cylinderNum : chr "four" "four" "six" "four" ...
## $ carManufacturer : chr "alfa-romero" "alfa-romero" "alfa-romero" "audi" ...
##
## -2 -1 0 1 2 3
## 3 22 67 54 32 27
##
## <-1 0 1 2 3
## 25 67 54 32 27
##
## 0 1 2 3 4
## 27 32 54 67 25
##
## eight five four six three twelve two
## 5 11 159 24 1 1 4
##
## five four geq_eight leq_three six
## 11 159 6 5 24
# factor_cols <- c("symboling", "safety", "safetyIncr", "cylindernumber", "cylinderNum","carbody")
# examine categorical fields
factor_cols<- c(
"symboling",
"safety",
"safetyIncr",
"safetyIncr2",
"cylindernumber",
"cylinderNum",
"carbody",
"fueltype",
"aspiration",
"doornumber",
"carbody",
"drivewheel",
"enginelocation", # BE note: 202:3 split so don't include in model
"enginetype",
"cylindernumber",
"fuelsystem",
"carManufacturer"
)
cols_use <- unique(intersect(factor_cols, names(df)))
fc_missing <- setdiff(unique(factor_cols), names(df))
if (length(fc_missing)) {
message(
"Missing in df (skipped): ",
paste(fc_missing, collapse = ", ")
)
}
tab_list <- setNames(
lapply(cols_use, function(v) table(df[[v]], useNA = "ifany")),
# lapply(cols_use, function(v) prop.table(table(df[[v]], useNA = "ifany"))),
cols_use
)
for (nm in names(tab_list)) {
cat("\n==== ", nm, " ====\n", sep = "")
print(tab_list[[nm]])
}##
## ==== symboling ====
##
## -2 -1 0 1 2 3
## 3 22 67 54 32 27
##
## ==== safety ====
##
## <-1 0 1 2 3
## 25 67 54 32 27
##
## ==== safetyIncr ====
##
## 0 1 2 3 4
## 27 32 54 67 25
##
## ==== safetyIncr2 ====
##
## Base Riskier Safer
## 67 113 25
##
## ==== cylindernumber ====
##
## eight five four six three twelve two
## 5 11 159 24 1 1 4
##
## ==== cylinderNum ====
##
## five four geq_eight leq_three six
## 11 159 6 5 24
##
## ==== carbody ====
##
## convertible hardtop hatchback sedan wagon
## 6 8 70 96 25
##
## ==== fueltype ====
##
## diesel gas
## 20 185
##
## ==== aspiration ====
##
## std turbo
## 168 37
##
## ==== doornumber ====
##
## four two
## 115 90
##
## ==== drivewheel ====
##
## 4wd fwd rwd
## 9 120 76
##
## ==== enginelocation ====
##
## front rear
## 202 3
##
## ==== enginetype ====
##
## dohc dohcv l ohc ohcf ohcv rotor
## 12 1 12 148 15 13 4
##
## ==== fuelsystem ====
##
## 1bbl 2bbl 4bbl idi mfi mpfi spdi spfi
## 11 66 3 20 1 94 9 1
##
## ==== carManufacturer ====
##
## alfa-romero audi bmw buick chevrolet dodge honda
## 3 7 8 8 3 9 13
## isuzu jaguar mazda mercury mitsubishi nissan peugeot
## 4 3 17 1 13 18 11
## plymouth porsche renault saab subaru toyota volkswagen
## 7 5 2 6 12 32 12
## volvo
## 11
## 'data.frame': 205 obs. of 32 variables:
## $ car_ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ symboling : Factor w/ 6 levels "-2","-1","0",..: 6 6 4 5 5 5 4 4 4 3 ...
## $ CarName : chr "alfa-romero giulia" "alfa-romero stelvio" "alfa-romero Quadrifoglio" "audi 100 ls" ...
## $ fueltype : Factor w/ 2 levels "diesel","gas": 2 2 2 2 2 2 2 2 2 2 ...
## $ aspiration : Factor w/ 2 levels "std","turbo": 1 1 1 1 1 1 1 1 2 2 ...
## $ doornumber : Factor w/ 2 levels "four","two": 2 2 2 1 1 2 1 1 1 2 ...
## $ carbody : Factor w/ 5 levels "convertible",..: 1 1 3 4 4 4 4 5 4 3 ...
## $ drivewheel : Factor w/ 3 levels "4wd","fwd","rwd": 3 3 3 2 1 2 2 2 2 1 ...
## $ enginelocation : Factor w/ 2 levels "front","rear": 1 1 1 1 1 1 1 1 1 1 ...
## $ wheelbase : num 88.6 88.6 94.5 99.8 99.4 ...
## $ carlength : num 169 169 171 177 177 ...
## $ carwidth : num 64.1 64.1 65.5 66.2 66.4 66.3 71.4 71.4 71.4 67.9 ...
## $ carheight : num 48.8 48.8 52.4 54.3 54.3 53.1 55.7 55.7 55.9 52 ...
## $ curbweight : int 2548 2548 2823 2337 2824 2507 2844 2954 3086 3053 ...
## $ enginetype : Factor w/ 7 levels "dohc","dohcv",..: 1 1 6 4 4 4 4 4 4 4 ...
## $ cylindernumber : Factor w/ 7 levels "eight","five",..: 3 3 4 3 2 2 2 2 2 2 ...
## $ enginesize : int 130 130 152 109 136 136 136 136 131 131 ...
## $ fuelsystem : Factor w/ 8 levels "1bbl","2bbl",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ boreratio : num 3.47 3.47 2.68 3.19 3.19 3.19 3.19 3.19 3.13 3.13 ...
## $ stroke : num 2.68 2.68 3.47 3.4 3.4 3.4 3.4 3.4 3.4 3.4 ...
## $ compressionratio: num 9 9 9 10 8 8.5 8.5 8.5 8.3 7 ...
## $ horsepower : int 111 111 154 102 115 110 110 110 140 160 ...
## $ peakrpm : int 5000 5000 5000 5500 5500 5500 5500 5500 5500 5500 ...
## $ citympg : int 21 21 19 24 18 19 19 19 17 16 ...
## $ highwaympg : int 27 27 26 30 22 25 25 25 20 22 ...
## $ price : num 13495 16500 16500 13950 17450 ...
## $ fsymboling : Factor w/ 6 levels "-2","-1","0",..: 6 6 4 5 5 5 4 4 4 3 ...
## $ safety : Factor w/ 5 levels "<-1","0","1",..: 5 5 3 4 4 4 3 3 3 2 ...
## $ safetyIncr : Factor w/ 5 levels "0","1","2","3",..: 1 1 3 2 2 2 3 3 3 4 ...
## $ safetyIncr2 : Factor w/ 3 levels "Base","Riskier",..: 2 2 2 2 2 2 2 2 2 1 ...
## $ cylinderNum : Factor w/ 5 levels "five","four",..: 2 2 5 2 1 1 1 1 1 1 ...
## $ carManufacturer : Factor w/ 22 levels "alfa-romero",..: 1 1 1 2 2 2 2 2 2 2 ...
Have chosen price as response variable. Want to see how
it’s distributed… (might want to use log(price) as used in Intro to
Python).
ggplot(df, aes(x = price)) +
geom_histogram(bins = 30, fill = "blue", alpha = 0.7) +
ggtitle("Distribution of Car Prices")Look at numerical variables vs price
# price vs horsepower
ggplot(df, aes(x = horsepower, y = price)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "red", se = FALSE) +
ggtitle("Price vs Horsepower") +
theme_minimal()# Price vs engine size
ggplot(df, aes(x = enginesize, y = price)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "red", se = FALSE) +
ggtitle("Price vs Engine Size") +
theme_minimal()# Price vs highwaympg (could also use citympg)
ggplot(df, aes(x = highwaympg, y = price)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "red", se = FALSE) +
ggtitle("Price vs MPG (Highway)") +
theme_minimal()# Price vs citympg (note: city vs highwaympg are likely to be correlated)
ggplot(df, aes(x = citympg, y = price)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "red", se = FALSE) +
ggtitle("Price vs MPG (City)") +
theme_minimal()This is only a quick look, but clear issues with negative price assuming simple linear regression > ~40 mpg for both city and highway mpg
# Price vs fuel type
ggplot(df, aes(x = fueltype, y = price)) +
geom_boxplot() +
ggtitle("Price
Distribution by Fuel Type") +
theme_minimal()# Price vs car body
ggplot(df, aes(x = carbody, y = price)) +
geom_boxplot() +
ggtitle("Price
Distribution by Car Body") +
theme_minimal()# Price vs (new) manufacturer
ggplot(df, aes(x = carManufacturer, y = price)) +
geom_boxplot() +
ggtitle("Price Distribution by Manufacturer") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))This is not unexpected, but is quite interesting to see. Some manufacturers are more expensive (& the range of models produced is also variable).
Based upon our personal investigation & search we think the data we have been given here are a subset of the data from 1985 Ward’s Automotive Yearbook (with a more comprehensive data set being available from https://archive.ics.uci.edu/dataset/10/automobile).
Note, this doesn’t consider companies that own other companies (i.e., Toyota currently owns Lexus, but both could be considered to be toyota despite both having different price points. This consideration is something we decided was beyond the scope of this analysis).
Our main goal is to get an accurate predicted price \[\hat{y}\] for a new car.
BE note: A good pricing assistant chimes with goals of assessment and the lecture content.
Excluding predictors that are highly correlated (parameter choice)
# Y = price
# X = horsepower, enginesize, curbweight, highwaympg,
# fueltype, aspiration, carbody, drivewheel, cylinderNum
mdl.pa1 <- lm(
price ~ horsepower + enginesize + curbweight + highwaympg +
fueltype + aspiration + carbody + drivewheel + cylinderNum,
data = df
)
print(summary(mdl.pa1))##
## Call:
## lm(formula = price ~ horsepower + enginesize + curbweight + highwaympg +
## fueltype + aspiration + carbody + drivewheel + cylinderNum,
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8084.9 -1197.2 -81.7 1316.3 14226.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -765.673 5565.870 -0.138 0.89073
## horsepower 44.333 13.657 3.246 0.00138 **
## enginesize 56.908 18.424 3.089 0.00231 **
## curbweight 3.238 1.518 2.133 0.03420 *
## highwaympg 46.589 74.310 0.627 0.53145
## fueltypegas -629.559 1227.468 -0.513 0.60863
## aspirationturbo 400.001 825.850 0.484 0.62870
## carbodyhardtop -2067.166 1699.423 -1.216 0.22536
## carbodyhatchback -4742.402 1360.316 -3.486 0.00061 ***
## carbodysedan -3091.102 1336.340 -2.313 0.02180 *
## carbodywagon -4489.628 1498.218 -2.997 0.00310 **
## drivewheelfwd 26.116 1141.418 0.023 0.98177
## drivewheelrwd 1711.372 1229.935 1.391 0.16574
## cylinderNumfour -4860.555 1113.372 -4.366 2.09e-05 ***
## cylinderNumgeq_eight 2254.206 2250.742 1.002 0.31785
## cylinderNumleq_three -655.617 2039.925 -0.321 0.74827
## cylinderNumsix -2068.122 1369.065 -1.511 0.13257
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3053 on 188 degrees of freedom
## Multiple R-squared: 0.8654, Adjusted R-squared: 0.8539
## F-statistic: 75.54 on 16 and 188 DF, p-value: < 2.2e-16
F-statistic: 75.54 on 16 and 188 DF, p-value: < 2.2e-16
Adjusted R-squared: 0.8539
BE comments: Model accounts for ~85% of variability in the variables (Adj. R-squared = 85.4%, F-statistic p-value < 2.2e-16).
Quite a few predictors have high p-values (not statistically significant)
Next step to use Backward Selection process to run algorithm removing the variable with the largest p-value and re-fitting the model…
## Start: AIC=3306.08
## price ~ horsepower + enginesize + curbweight + highwaympg + fueltype +
## aspiration + carbody + drivewheel + cylinderNum
##
## Df Sum of Sq RSS AIC
## - aspiration 1 2186978 1754787582 3304.3
## - fueltype 1 2452323 1755052928 3304.4
## - highwaympg 1 3664316 1756264920 3304.5
## <none> 1752600604 3306.1
## - drivewheel 2 53740715 1806341319 3308.3
## - curbweight 1 42424115 1795024719 3309.0
## - enginesize 1 88938316 1841538921 3314.2
## - horsepower 1 98234746 1850835351 3315.3
## - carbody 4 193349724 1945950328 3319.5
## - cylinderNum 4 327945047 2080545651 3333.2
##
## Step: AIC=3304.33
## price ~ horsepower + enginesize + curbweight + highwaympg + fueltype +
## carbody + drivewheel + cylinderNum
##
## Df Sum of Sq RSS AIC
## - highwaympg 1 2819841 1757607423 3302.7
## - fueltype 1 7240661 1762028243 3303.2
## <none> 1754787582 3304.3
## - drivewheel 2 51554337 1806341919 3306.3
## - curbweight 1 44899211 1799686793 3307.5
## - enginesize 1 87087320 1841874902 3312.3
## - carbody 4 191244297 1946031880 3317.5
## - horsepower 1 141840989 1896628571 3318.3
## - cylinderNum 4 340069822 2094857404 3332.6
##
## Step: AIC=3302.66
## price ~ horsepower + enginesize + curbweight + fueltype + carbody +
## drivewheel + cylinderNum
##
## Df Sum of Sq RSS AIC
## <none> 1757607423 3302.7
## - fueltype 1 18328613 1775936037 3302.8
## - drivewheel 2 51993718 1809601142 3304.6
## - curbweight 1 47386509 1804993933 3306.1
## - enginesize 1 90937348 1848544771 3311.0
## - carbody 4 188713756 1946321179 3315.6
## - horsepower 1 140987873 1898595296 3316.5
## - cylinderNum 4 337658895 2095266319 3330.7
See if variables removed as a group were significant.
Hypothesis:
## Analysis of Variance Table
##
## Model 1: price ~ horsepower + enginesize + curbweight + fueltype + carbody +
## drivewheel + cylinderNum
## Model 2: price ~ horsepower + enginesize + curbweight + highwaympg + fueltype +
## aspiration + carbody + drivewheel + cylinderNum
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 190 1757607423
## 2 188 1752600604 2 5006819 0.2685 0.7648
## GVIF Df GVIF^(1/(2*Df))
## horsepower 4.717089 1 2.171886
## enginesize 12.163868 1 3.487674
## curbweight 9.581354 1 3.095376
## fueltype 1.529787 1 1.236846
## carbody 1.980039 4 1.089141
## drivewheel 2.796677 2 1.293185
## cylinderNum 7.050859 4 1.276528
Brief BE summary:
Plots show two issues
To fix heteroscedasticity let’s use log(price) & look at what happens if we include carManufacturer?
mdl.pa3 <- lm(
log(price) ~ horsepower + enginesize + curbweight + highwaympg +
fueltype + aspiration + carbody + drivewheel + cylinderNum +
carManufacturer,
data = df
)
summary(mdl.pa3)##
## Call:
## lm(formula = log(price) ~ horsepower + enginesize + curbweight +
## highwaympg + fueltype + aspiration + carbody + drivewheel +
## cylinderNum + carManufacturer, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38320 -0.06125 0.00149 0.07107 0.39165
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.921e+00 2.881e-01 27.498 < 2e-16 ***
## horsepower 1.845e-03 9.248e-04 1.995 0.047619 *
## enginesize 1.392e-03 9.460e-04 1.471 0.143096
## curbweight 4.859e-04 7.902e-05 6.149 5.56e-09 ***
## highwaympg -2.788e-03 3.509e-03 -0.795 0.427919
## fueltypegas -1.764e-02 6.014e-02 -0.293 0.769692
## aspirationturbo 7.776e-02 4.274e-02 1.819 0.070675 .
## carbodyhardtop -1.651e-01 7.622e-02 -2.166 0.031741 *
## carbodyhatchback -2.283e-01 6.453e-02 -3.539 0.000521 ***
## carbodysedan -1.645e-01 6.479e-02 -2.538 0.012051 *
## carbodywagon -2.453e-01 7.022e-02 -3.493 0.000611 ***
## drivewheelfwd 4.103e-02 5.471e-02 0.750 0.454396
## drivewheelrwd 7.213e-02 6.621e-02 1.089 0.277540
## cylinderNumfour 1.106e-01 8.426e-02 1.313 0.191139
## cylinderNumgeq_eight -3.306e-02 1.046e-01 -0.316 0.752281
## cylinderNumleq_three 3.267e-01 1.145e-01 2.854 0.004861 **
## cylinderNumsix 1.219e-01 8.638e-02 1.411 0.160179
## carManufactureraudi 2.936e-01 1.206e-01 2.435 0.015924 *
## carManufacturerbmw 3.465e-01 9.778e-02 3.543 0.000513 ***
## carManufacturerbuick 2.318e-01 1.365e-01 1.697 0.091480 .
## carManufacturerchevrolet -1.765e-01 1.241e-01 -1.422 0.156774
## carManufacturerdodge -1.828e-01 9.899e-02 -1.847 0.066510 .
## carManufacturerhonda -7.288e-02 9.518e-02 -0.766 0.444941
## carManufacturerisuzu -6.875e-02 1.073e-01 -0.641 0.522474
## carManufacturerjaguar -1.046e-01 1.345e-01 -0.778 0.437904
## carManufacturermazda -3.609e-02 9.384e-02 -0.385 0.701016
## carManufacturermercury -8.873e-02 1.551e-01 -0.572 0.568014
## carManufacturermitsubishi -2.249e-01 9.760e-02 -2.304 0.022457 *
## carManufacturernissan -1.415e-01 9.224e-02 -1.534 0.126826
## carManufacturerpeugeot -1.686e-01 1.065e-01 -1.583 0.115363
## carManufacturerplymouth -2.120e-01 1.004e-01 -2.112 0.036137 *
## carManufacturerporsche 4.521e-01 1.084e-01 4.170 4.88e-05 ***
## carManufacturerrenault -1.369e-01 1.282e-01 -1.068 0.286926
## carManufacturersaab 7.133e-02 1.082e-01 0.659 0.510726
## carManufacturersubaru -1.630e-01 9.795e-02 -1.664 0.098036 .
## carManufacturertoyota -1.511e-01 8.732e-02 -1.730 0.085431 .
## carManufacturervolkswagen -4.404e-02 9.448e-02 -0.466 0.641742
## carManufacturervolvo 9.085e-03 9.783e-02 0.093 0.926122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1248 on 167 degrees of freedom
## Multiple R-squared: 0.9498, Adjusted R-squared: 0.9386
## F-statistic: 85.34 on 37 and 167 DF, p-value: < 2.2e-16
## Start: AIC=-819.25
## log(price) ~ horsepower + enginesize + curbweight + highwaympg +
## fueltype + aspiration + carbody + drivewheel + cylinderNum +
## carManufacturer
##
## Df Sum of Sq RSS AIC
## - drivewheel 2 0.01873 2.6198 -821.78
## - fueltype 1 0.00134 2.6024 -821.15
## - highwaympg 1 0.00984 2.6109 -820.48
## <none> 2.6011 -819.25
## - enginesize 1 0.03372 2.6348 -818.61
## - aspiration 1 0.05155 2.6526 -817.23
## - horsepower 1 0.06202 2.6631 -816.42
## - cylinderNum 4 0.15826 2.7593 -815.14
## - carbody 4 0.33738 2.9385 -802.25
## - curbweight 1 0.58891 3.1900 -779.41
## - carManufacturer 21 2.77342 5.3745 -712.48
##
## Step: AIC=-821.78
## log(price) ~ horsepower + enginesize + curbweight + highwaympg +
## fueltype + aspiration + carbody + cylinderNum + carManufacturer
##
## Df Sum of Sq RSS AIC
## - fueltype 1 0.00495 2.6248 -823.39
## - highwaympg 1 0.01111 2.6309 -822.91
## <none> 2.6198 -821.78
## - enginesize 1 0.03664 2.6565 -820.93
## - aspiration 1 0.03803 2.6578 -820.83
## - horsepower 1 0.10127 2.7211 -816.01
## - cylinderNum 4 0.19570 2.8155 -815.01
## - carbody 4 0.34719 2.9670 -804.27
## - curbweight 1 0.61217 3.2320 -780.73
## - carManufacturer 21 2.92664 5.5465 -710.02
##
## Step: AIC=-823.39
## log(price) ~ horsepower + enginesize + curbweight + highwaympg +
## aspiration + carbody + cylinderNum + carManufacturer
##
## Df Sum of Sq RSS AIC
## - highwaympg 1 0.00642 2.6312 -824.89
## <none> 2.6248 -823.39
## - enginesize 1 0.04331 2.6681 -822.04
## - aspiration 1 0.08273 2.7075 -819.03
## - horsepower 1 0.10214 2.7269 -817.57
## - cylinderNum 4 0.21732 2.8421 -815.09
## - carbody 4 0.35725 2.9820 -805.23
## - curbweight 1 0.78807 3.4128 -771.57
## - carManufacturer 21 3.07072 5.6955 -706.58
##
## Step: AIC=-824.89
## log(price) ~ horsepower + enginesize + curbweight + aspiration +
## carbody + cylinderNum + carManufacturer
##
## Df Sum of Sq RSS AIC
## <none> 2.6312 -824.89
## - enginesize 1 0.04007 2.6713 -823.79
## - aspiration 1 0.07675 2.7079 -821.00
## - cylinderNum 4 0.23362 2.8648 -815.45
## - horsepower 1 0.17210 2.8033 -813.90
## - carbody 4 0.36112 2.9923 -806.53
## - curbweight 1 0.90130 3.5325 -766.51
## - carManufacturer 21 3.06450 5.6957 -708.58
##
## Call:
## lm(formula = log(price) ~ horsepower + enginesize + curbweight +
## aspiration + carbody + cylinderNum + carManufacturer, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35131 -0.06436 0.00549 0.06560 0.41121
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.785e+00 1.636e-01 47.580 < 2e-16 ***
## horsepower 2.234e-03 6.681e-04 3.344 0.001013 **
## enginesize 1.447e-03 8.969e-04 1.614 0.108415
## curbweight 5.068e-04 6.622e-05 7.653 1.37e-12 ***
## aspirationturbo 7.176e-02 3.213e-02 2.233 0.026821 *
## carbodyhardtop -1.666e-01 7.531e-02 -2.213 0.028256 *
## carbodyhatchback -2.359e-01 6.357e-02 -3.710 0.000280 ***
## carbodysedan -1.730e-01 6.330e-02 -2.734 0.006922 **
## carbodywagon -2.583e-01 6.863e-02 -3.763 0.000230 ***
## cylinderNumfour 1.188e-01 8.188e-02 1.451 0.148706
## cylinderNumgeq_eight -6.697e-02 1.010e-01 -0.663 0.508069
## cylinderNumleq_three 3.681e-01 1.092e-01 3.370 0.000928 ***
## cylinderNumsix 1.245e-01 8.534e-02 1.458 0.146543
## carManufactureraudi 2.699e-01 1.142e-01 2.364 0.019184 *
## carManufacturerbmw 3.455e-01 9.617e-02 3.592 0.000428 ***
## carManufacturerbuick 2.512e-01 1.316e-01 1.909 0.058000 .
## carManufacturerchevrolet -2.239e-01 1.160e-01 -1.931 0.055195 .
## carManufacturerdodge -2.013e-01 9.465e-02 -2.127 0.034895 *
## carManufacturerhonda -9.350e-02 9.116e-02 -1.026 0.306487
## carManufacturerisuzu -7.672e-02 1.051e-01 -0.730 0.466217
## carManufacturerjaguar -1.273e-01 1.302e-01 -0.978 0.329502
## carManufacturermazda -4.852e-02 9.119e-02 -0.532 0.595326
## carManufacturermercury -9.729e-02 1.538e-01 -0.633 0.527732
## carManufacturermitsubishi -2.475e-01 9.162e-02 -2.702 0.007593 **
## carManufacturernissan -1.631e-01 8.862e-02 -1.841 0.067378 .
## carManufacturerpeugeot -1.536e-01 1.032e-01 -1.488 0.138565
## carManufacturerplymouth -2.281e-01 9.692e-02 -2.353 0.019755 *
## carManufacturerporsche 4.238e-01 1.026e-01 4.130 5.65e-05 ***
## carManufacturerrenault -1.577e-01 1.237e-01 -1.275 0.204105
## carManufacturersaab 4.221e-02 9.970e-02 0.423 0.672556
## carManufacturersubaru -1.925e-01 9.212e-02 -2.090 0.038133 *
## carManufacturertoyota -1.648e-01 8.551e-02 -1.927 0.055623 .
## carManufacturervolkswagen -6.172e-02 9.032e-02 -0.683 0.495316
## carManufacturervolvo 1.254e-02 9.489e-02 0.132 0.894990
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.124 on 171 degrees of freedom
## Multiple R-squared: 0.9492, Adjusted R-squared: 0.9394
## F-statistic: 96.79 on 33 and 171 DF, p-value: < 2.2e-16
## GVIF Df GVIF^(1/(2*Df))
## horsepower 9.254549 1 3.042129
## enginesize 18.493058 1 4.300356
## curbweight 15.763028 1 3.970268
## aspiration 2.034585 1 1.426389
## carbody 4.036524 4 1.190559
## cylinderNum 71.722557 4 1.705913
## carManufacturer 383.938806 21 1.152206
## Analysis of Variance Table
##
## Model 1: log(price) ~ horsepower + enginesize + curbweight + highwaympg +
## fueltype + aspiration + carbody + drivewheel + cylinderNum +
## carManufacturer
## Model 2: log(price) ~ horsepower + enginesize + curbweight + aspiration +
## carbody + cylinderNum + carManufacturer
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 167 2.6011
## 2 171 2.6312 -4 -0.030097 0.4831 0.7481
mdl.pa4 now shows a random, horizontal band of points.
The red line is flat. This indicates the variance of the residuals is
now constant, and we can trust our p-values and standard errors.GVIF^(1/(2*Df)) are below 5 -> no collinearity in
modelhighwaympg, fueltype, and
drivewheel as a group improve the model.mdl.pa4 (simpiler) is just as good as
mdl.pa3, so we should use mdl.pa4.
BE note: we know from data source that these prices are in dollars…
# Example properties of new car
# BE note: have to use exact text for factor vals
# Also a limitation of this (not built in) is that it doesn't check for limits
# of data (i.e., if we put in a horsepower over the max value the model won't
# be valid, but this isn't accounted for here...)
# this also doesn't account for types of engine produced by companies
new_car <- data.frame(
horsepower = 100,
enginesize = 120,
curbweight = 2500,
aspiration = as.factor("std"),
carbody = as.factor("sedan"),
cylinderNum = as.factor("four"),
carManufacturer = as.factor("toyota") # peugeot/volvo/porsche
)
# use predict() to estimate fit
pred_log_scale <-
predict(mdl.pa4, newdata = new_car, interval = "prediction")
print("Prediction on log($) scale:")## [1] "Prediction on log($) scale:"
## fit lwr upr
## 1 9.230509 8.979491 9.481526
# convert back from log price for fit, lwr, upr
pred_dollar_scale <- exp(pred_log_scale)
print("Prediction on dollar ($) scale:")## [1] "Prediction on dollar ($) scale:"
## fit lwr upr
## 1 10203.73 7938.589 13115.19
# All in one...
car_props <- paste(
new_car$carManufacturer, new_car$carbody,
"with", new_car$horsepower, "hp,",
new_car$enginesize, "enginesize, and",
new_car$curbweight, "lb curbweight"
)
fit_price <- pred_dollar_scale[1, "fit"]
lwr_price <- pred_dollar_scale[1, "lwr"]
upr_price <- pred_dollar_scale[1, "upr"]
report_string <- sprintf(
"\nBased on our final model, for a %s:
\n> The single best price estimate (yhat) is $%.0f.
\n> We can say we are 95%% confident that the listing price for a car with
these specific specifications would fall within prediction interval of
[$%.0f, $%.0f].",
car_props,
fit_price,
lwr_price,
upr_price
)
cat(report_string)##
## Based on our final model, for a toyota sedan with 100 hp, 120 enginesize, and 2500 lb curbweight:
##
## > The single best price estimate (yhat) is $10204.
##
## > We can say we are 95% confident that the listing price for a car with
## these specific specifications would fall within prediction interval of
## [$7939, $13115].
get_price_prediction_report <- function(model,
hp,
engine_size,
curb_weight,
asp,
body,
cyl,
mfr) {
new_car <- data.frame(
horsepower = hp,
enginesize = engine_size,
curbweight = curb_weight,
aspiration = as.factor(asp),
carbody = as.factor(body),
cylinderNum = as.factor(cyl),
carManufacturer = as.factor(mfr)
)
pred_log_scale <- predict(model, newdata = new_car, interval = "prediction")
pred_dollar_scale <- exp(pred_log_scale)
car_props <- paste(
new_car$carManufacturer, new_car$carbody,
"with", new_car$horsepower, "hp,",
new_car$enginesize, "enginesize, and",
new_car$curbweight, "lb curbweight"
)
fit_price <- pred_dollar_scale[1, "fit"]
lwr_price <- pred_dollar_scale[1, "lwr"]
upr_price <- pred_dollar_scale[1, "upr"]
report_string <- sprintf(
"\nBased on our final model, for a %s:
> The single best price estimate (y-hat) is $%.0f
> 95%% CI for this specific car is [$%.0f, $%.0f]",
car_props,
fit_price,
lwr_price,
upr_price
)
cat(report_string)
}# test 01: same toyota (to check :))
get_price_prediction_report(
model = mdl.pa4,
hp = 100,
engine_size = 120,
curb_weight = 2500,
asp = "std",
body = "sedan",
cyl = "four",
mfr = "toyota"
)##
## Based on our final model, for a toyota sedan with 100 hp, 120 enginesize, and 2500 lb curbweight:
## > The single best price estimate (y-hat) is $10204
## > 95% CI for this specific car is [$7939, $13115]
# test 02: porsche
get_price_prediction_report(
model = mdl.pa4,
hp = 200,
engine_size = 180,
curb_weight = 3000,
asp = "turbo",
body = "hardtop",
cyl = "six",
mfr = "porsche"
)##
## Based on our final model, for a porsche hardtop with 200 hp, 180 enginesize, and 3000 lb curbweight:
## > The single best price estimate (y-hat) is $35127
## > 95% CI for this specific car is [$26391, $46756]
# test 03: honda
get_price_prediction_report(
model = mdl.pa4,
hp = 150,
engine_size = 120,
curb_weight = 1000,
asp = "std",
body = "convertible",
cyl = "five",
mfr = "honda"
)##
## Based on our final model, for a honda convertible with 150 hp, 120 enginesize, and 1000 lb curbweight:
## > The single best price estimate (y-hat) is $6048
## > 95% CI for this specific car is [$4075, $8977]
Observation 76 is a high-leverage point
row_to_remove <- 76
df_fixed <- df[-row_to_remove, ]
mdl.pa5 <- lm(formula(mdl.pa4), data = df_fixed)
print(summary(mdl.pa4))##
## Call:
## lm(formula = log(price) ~ horsepower + enginesize + curbweight +
## aspiration + carbody + cylinderNum + carManufacturer, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35131 -0.06436 0.00549 0.06560 0.41121
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.785e+00 1.636e-01 47.580 < 2e-16 ***
## horsepower 2.234e-03 6.681e-04 3.344 0.001013 **
## enginesize 1.447e-03 8.969e-04 1.614 0.108415
## curbweight 5.068e-04 6.622e-05 7.653 1.37e-12 ***
## aspirationturbo 7.176e-02 3.213e-02 2.233 0.026821 *
## carbodyhardtop -1.666e-01 7.531e-02 -2.213 0.028256 *
## carbodyhatchback -2.359e-01 6.357e-02 -3.710 0.000280 ***
## carbodysedan -1.730e-01 6.330e-02 -2.734 0.006922 **
## carbodywagon -2.583e-01 6.863e-02 -3.763 0.000230 ***
## cylinderNumfour 1.188e-01 8.188e-02 1.451 0.148706
## cylinderNumgeq_eight -6.697e-02 1.010e-01 -0.663 0.508069
## cylinderNumleq_three 3.681e-01 1.092e-01 3.370 0.000928 ***
## cylinderNumsix 1.245e-01 8.534e-02 1.458 0.146543
## carManufactureraudi 2.699e-01 1.142e-01 2.364 0.019184 *
## carManufacturerbmw 3.455e-01 9.617e-02 3.592 0.000428 ***
## carManufacturerbuick 2.512e-01 1.316e-01 1.909 0.058000 .
## carManufacturerchevrolet -2.239e-01 1.160e-01 -1.931 0.055195 .
## carManufacturerdodge -2.013e-01 9.465e-02 -2.127 0.034895 *
## carManufacturerhonda -9.350e-02 9.116e-02 -1.026 0.306487
## carManufacturerisuzu -7.672e-02 1.051e-01 -0.730 0.466217
## carManufacturerjaguar -1.273e-01 1.302e-01 -0.978 0.329502
## carManufacturermazda -4.852e-02 9.119e-02 -0.532 0.595326
## carManufacturermercury -9.729e-02 1.538e-01 -0.633 0.527732
## carManufacturermitsubishi -2.475e-01 9.162e-02 -2.702 0.007593 **
## carManufacturernissan -1.631e-01 8.862e-02 -1.841 0.067378 .
## carManufacturerpeugeot -1.536e-01 1.032e-01 -1.488 0.138565
## carManufacturerplymouth -2.281e-01 9.692e-02 -2.353 0.019755 *
## carManufacturerporsche 4.238e-01 1.026e-01 4.130 5.65e-05 ***
## carManufacturerrenault -1.577e-01 1.237e-01 -1.275 0.204105
## carManufacturersaab 4.221e-02 9.970e-02 0.423 0.672556
## carManufacturersubaru -1.925e-01 9.212e-02 -2.090 0.038133 *
## carManufacturertoyota -1.648e-01 8.551e-02 -1.927 0.055623 .
## carManufacturervolkswagen -6.172e-02 9.032e-02 -0.683 0.495316
## carManufacturervolvo 1.254e-02 9.489e-02 0.132 0.894990
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.124 on 171 degrees of freedom
## Multiple R-squared: 0.9492, Adjusted R-squared: 0.9394
## F-statistic: 96.79 on 33 and 171 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = formula(mdl.pa4), data = df_fixed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35131 -0.06456 0.00564 0.06565 0.41121
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.785e+00 1.636e-01 47.580 < 2e-16 ***
## horsepower 2.234e-03 6.681e-04 3.344 0.001013 **
## enginesize 1.447e-03 8.969e-04 1.614 0.108415
## curbweight 5.068e-04 6.622e-05 7.653 1.37e-12 ***
## aspirationturbo 7.176e-02 3.213e-02 2.233 0.026821 *
## carbodyhardtop -1.666e-01 7.531e-02 -2.213 0.028256 *
## carbodyhatchback -2.359e-01 6.357e-02 -3.710 0.000280 ***
## carbodysedan -1.730e-01 6.330e-02 -2.734 0.006922 **
## carbodywagon -2.583e-01 6.863e-02 -3.763 0.000230 ***
## cylinderNumfour 1.188e-01 8.188e-02 1.451 0.148706
## cylinderNumgeq_eight -6.697e-02 1.010e-01 -0.663 0.508069
## cylinderNumleq_three 3.681e-01 1.092e-01 3.370 0.000928 ***
## cylinderNumsix 1.245e-01 8.534e-02 1.458 0.146543
## carManufactureraudi 2.699e-01 1.142e-01 2.364 0.019184 *
## carManufacturerbmw 3.455e-01 9.617e-02 3.592 0.000428 ***
## carManufacturerbuick 2.512e-01 1.316e-01 1.909 0.058000 .
## carManufacturerchevrolet -2.239e-01 1.160e-01 -1.931 0.055195 .
## carManufacturerdodge -2.013e-01 9.465e-02 -2.127 0.034895 *
## carManufacturerhonda -9.350e-02 9.116e-02 -1.026 0.306487
## carManufacturerisuzu -7.672e-02 1.051e-01 -0.730 0.466217
## carManufacturerjaguar -1.273e-01 1.302e-01 -0.978 0.329502
## carManufacturermazda -4.852e-02 9.119e-02 -0.532 0.595326
## carManufacturermitsubishi -2.475e-01 9.162e-02 -2.702 0.007593 **
## carManufacturernissan -1.631e-01 8.862e-02 -1.841 0.067378 .
## carManufacturerpeugeot -1.536e-01 1.032e-01 -1.488 0.138565
## carManufacturerplymouth -2.281e-01 9.692e-02 -2.353 0.019755 *
## carManufacturerporsche 4.238e-01 1.026e-01 4.130 5.65e-05 ***
## carManufacturerrenault -1.577e-01 1.237e-01 -1.275 0.204105
## carManufacturersaab 4.221e-02 9.970e-02 0.423 0.672556
## carManufacturersubaru -1.925e-01 9.212e-02 -2.090 0.038133 *
## carManufacturertoyota -1.648e-01 8.551e-02 -1.927 0.055623 .
## carManufacturervolkswagen -6.172e-02 9.032e-02 -0.683 0.495316
## carManufacturervolvo 1.254e-02 9.489e-02 0.132 0.894990
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.124 on 171 degrees of freedom
## Multiple R-squared: 0.9491, Adjusted R-squared: 0.9395
## F-statistic: 99.56 on 32 and 171 DF, p-value: < 2.2e-16
##
## --- VIF Scores for Robust Final Model ---
## GVIF Df GVIF^(1/(2*Df))
## horsepower 9.108072 1 3.017958
## enginesize 18.484053 1 4.299308
## curbweight 15.727048 1 3.965734
## aspiration 1.989300 1 1.410425
## carbody 3.998363 4 1.189146
## cylinderNum 71.620842 4 1.705611
## carManufacturer 359.645296 20 1.158502
Here we are asking: “How does fuel economy (citympg) affect a car’s price, after controlling for other characteristics?”
vars <- c(
"price", "safetyIncr", "aspiration", "carbody", "enginelocation",
"carwidth", "curbweight", "enginetype", "cylinderNum", "enginesize",
"stroke", "peakrpm", "compressionratio", "carManufacturer", "citympg"
)
dfm <- df[, vars]
dfm <- dfm[complete.cases(dfm), ]
dfm[] <- lapply(dfm, function(x) if (is.factor(x)) droplevels(x) else x)
# BE note: mpg1 is not included here, but have kept naming the same for
# consistency
lm.mpg2 <- lm(
price ~ safetyIncr + aspiration + carbody +
enginelocation + carwidth + curbweight + enginetype +
cylinderNum + enginesize + stroke + peakrpm + compressionratio +
carManufacturer + citympg,
data = dfm
)
print(summary(lm.mpg2))##
## Call:
## lm(formula = price ~ safetyIncr + aspiration + carbody + enginelocation +
## carwidth + curbweight + enginetype + cylinderNum + enginesize +
## stroke + peakrpm + compressionratio + carManufacturer + citympg,
## data = dfm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3625.7 -913.2 0.0 877.3 8457.7
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.859e+04 1.172e+04 -4.998 1.52e-06 ***
## safetyIncr1 5.175e+02 7.122e+02 0.727 0.468546
## safetyIncr2 1.436e+03 7.279e+02 1.973 0.050274 .
## safetyIncr3 1.408e+03 7.726e+02 1.823 0.070266 .
## safetyIncr4 9.524e+02 9.365e+02 1.017 0.310726
## aspirationturbo 2.132e+03 5.085e+02 4.193 4.57e-05 ***
## carbodyhardtop -2.688e+03 1.196e+03 -2.248 0.025978 *
## carbodyhatchback -2.997e+03 1.047e+03 -2.861 0.004790 **
## carbodysedan -2.915e+03 1.094e+03 -2.665 0.008502 **
## carbodywagon -3.776e+03 1.149e+03 -3.287 0.001247 **
## enginelocationrear 1.189e+04 3.126e+03 3.803 0.000204 ***
## carwidth 6.679e+02 1.947e+02 3.431 0.000767 ***
## curbweight 4.573e+00 1.350e+00 3.388 0.000888 ***
## enginetypedohcv -3.609e+03 3.558e+03 -1.014 0.312029
## enginetypel -4.170e+03 1.733e+03 -2.406 0.017263 *
## enginetypeohc 1.352e+03 1.032e+03 1.310 0.192061
## enginetypeohcf -3.332e+03 1.589e+03 -2.097 0.037608 *
## enginetypeohcv -3.126e+03 1.188e+03 -2.632 0.009325 **
## enginetyperotor -3.855e+03 3.371e+03 -1.144 0.254517
## cylinderNumfour 3.030e+03 1.373e+03 2.207 0.028763 *
## cylinderNumgeq_eight 6.613e+03 2.208e+03 2.996 0.003181 **
## cylinderNumleq_three 1.382e+04 3.516e+03 3.931 0.000126 ***
## cylinderNumsix 5.441e+03 1.586e+03 3.431 0.000768 ***
## enginesize 7.207e+01 1.670e+01 4.316 2.79e-05 ***
## stroke -1.416e+03 9.024e+02 -1.569 0.118617
## peakrpm 1.865e+00 5.866e-01 3.180 0.001773 **
## compressionratio -6.940e+01 6.173e+01 -1.124 0.262612
## carManufactureraudi 1.051e+03 2.144e+03 0.490 0.624764
## carManufacturerbmw 3.851e+03 1.955e+03 1.969 0.050654 .
## carManufacturerbuick 5.182e+03 2.130e+03 2.433 0.016103 *
## carManufacturerchevrolet -4.532e+03 2.100e+03 -2.157 0.032480 *
## carManufacturerdodge -4.590e+03 1.693e+03 -2.712 0.007438 **
## carManufacturerhonda -4.072e+03 1.787e+03 -2.278 0.024059 *
## carManufacturerisuzu -2.902e+03 1.791e+03 -1.620 0.107121
## carManufacturerjaguar -1.256e+02 2.199e+03 -0.057 0.954545
## carManufacturermazda -3.229e+03 1.545e+03 -2.090 0.038204 *
## carManufacturermercury -5.075e+03 2.488e+03 -2.039 0.043074 *
## carManufacturermitsubishi -5.207e+03 1.686e+03 -3.088 0.002383 **
## carManufacturernissan -3.816e+03 1.521e+03 -2.508 0.013141 *
## carManufacturerpeugeot NA NA NA NA
## carManufacturerplymouth -4.899e+03 1.724e+03 -2.841 0.005087 **
## carManufacturerporsche 2.776e+03 2.532e+03 1.096 0.274553
## carManufacturerrenault -5.121e+03 2.146e+03 -2.386 0.018230 *
## carManufacturersaab -1.211e+03 1.704e+03 -0.711 0.478210
## carManufacturersubaru NA NA NA NA
## carManufacturertoyota -4.033e+03 1.437e+03 -2.806 0.005652 **
## carManufacturervolkswagen -2.923e+03 1.682e+03 -1.738 0.084173 .
## carManufacturervolvo -3.055e+03 1.862e+03 -1.641 0.102835
## citympg 1.076e+02 6.651e+01 1.618 0.107634
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1816 on 158 degrees of freedom
## Multiple R-squared: 0.96, Adjusted R-squared: 0.9483
## F-statistic: 82.35 on 46 and 158 DF, p-value: < 2.2e-16
BE note: vif raises an error:
Error in vif.default(lm.mpg2): there are aliased coefficients in the model
This (& plots) indicate:
Model gives p-value of 0.107634, which suggests effect of
citympg is not significant, but errors stated above
indicate/suggest model is not valid
# use log(price)
lm.mpg2log <- lm(
log(price) ~ safetyIncr + aspiration + carbody +
enginelocation + carwidth + curbweight + enginetype +
cylinderNum + enginesize + stroke + peakrpm + compressionratio +
carManufacturer + citympg,
data = dfm
)
summary(lm.mpg2log)##
## Call:
## lm(formula = log(price) ~ safetyIncr + aspiration + carbody +
## enginelocation + carwidth + curbweight + enginetype + cylinderNum +
## enginesize + stroke + peakrpm + compressionratio + carManufacturer +
## citympg, data = dfm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28965 -0.06365 0.00171 0.06179 0.43451
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.325e+00 7.684e-01 6.930 1.01e-10 ***
## safetyIncr1 -6.262e-02 4.669e-02 -1.341 0.181786
## safetyIncr2 -5.709e-02 4.771e-02 -1.197 0.233273
## safetyIncr3 -2.135e-02 5.064e-02 -0.422 0.673873
## safetyIncr4 1.902e-03 6.139e-02 0.031 0.975318
## aspirationturbo 1.202e-01 3.333e-02 3.605 0.000418 ***
## carbodyhardtop -1.210e-01 7.840e-02 -1.544 0.124627
## carbodyhatchback -1.864e-01 6.865e-02 -2.715 0.007362 **
## carbodysedan -1.423e-01 7.170e-02 -1.984 0.048980 *
## carbodywagon -2.277e-01 7.530e-02 -3.024 0.002912 **
## enginelocationrear 6.313e-01 2.049e-01 3.080 0.002440 **
## carwidth 4.114e-02 1.276e-02 3.224 0.001537 **
## curbweight 4.517e-04 8.847e-05 5.105 9.38e-07 ***
## enginetypedohcv -1.778e-01 2.333e-01 -0.762 0.447128
## enginetypel -3.669e-01 1.136e-01 -3.230 0.001507 **
## enginetypeohc -2.060e-02 6.765e-02 -0.304 0.761155
## enginetypeohcf -3.078e-01 1.042e-01 -2.954 0.003614 **
## enginetypeohcv -8.030e-02 7.784e-02 -1.032 0.303855
## enginetyperotor -4.276e-01 2.210e-01 -1.935 0.054761 .
## cylinderNumfour 1.181e-01 9.000e-02 1.312 0.191259
## cylinderNumgeq_eight 1.301e-01 1.447e-01 0.899 0.370010
## cylinderNumleq_three 7.171e-01 2.305e-01 3.111 0.002212 **
## cylinderNumsix 1.295e-01 1.040e-01 1.246 0.214712
## enginesize 1.932e-03 1.095e-03 1.765 0.079549 .
## stroke -3.219e-02 5.915e-02 -0.544 0.587038
## peakrpm 7.092e-05 3.846e-05 1.844 0.067016 .
## compressionratio 1.544e-03 4.046e-03 0.381 0.703370
## carManufactureraudi 7.558e-02 1.406e-01 0.538 0.591579
## carManufacturerbmw 2.871e-01 1.282e-01 2.240 0.026486 *
## carManufacturerbuick -3.798e-02 1.396e-01 -0.272 0.786005
## carManufacturerchevrolet -2.375e-01 1.377e-01 -1.725 0.086454 .
## carManufacturerdodge -2.841e-01 1.110e-01 -2.561 0.011388 *
## carManufacturerhonda -2.033e-01 1.172e-01 -1.735 0.084623 .
## carManufacturerisuzu -1.114e-01 1.174e-01 -0.949 0.344305
## carManufacturerjaguar -2.321e-01 1.442e-01 -1.610 0.109381
## carManufacturermazda -1.627e-01 1.013e-01 -1.607 0.110028
## carManufacturermercury -1.597e-01 1.631e-01 -0.979 0.328936
## carManufacturermitsubishi -3.505e-01 1.105e-01 -3.170 0.001828 **
## carManufacturernissan -2.060e-01 9.972e-02 -2.066 0.040505 *
## carManufacturerpeugeot NA NA NA NA
## carManufacturerplymouth -3.109e-01 1.130e-01 -2.750 0.006647 **
## carManufacturerporsche 1.796e-01 1.660e-01 1.082 0.280897
## carManufacturerrenault -2.921e-01 1.407e-01 -2.076 0.039537 *
## carManufacturersaab -8.446e-02 1.117e-01 -0.756 0.450590
## carManufacturersubaru NA NA NA NA
## carManufacturertoyota -2.357e-01 9.422e-02 -2.501 0.013398 *
## carManufacturervolkswagen -1.781e-01 1.102e-01 -1.616 0.108162
## carManufacturervolvo -1.712e-01 1.220e-01 -1.403 0.162601
## citympg -4.080e-03 4.360e-03 -0.936 0.350801
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1191 on 158 degrees of freedom
## Multiple R-squared: 0.9567, Adjusted R-squared: 0.9441
## F-statistic: 75.96 on 46 and 158 DF, p-value: < 2.2e-16
lm.mpg2logaic <- stepAIC(lm.mpg2log, direction = "backward", trace = FALSE)
print(summary(lm.mpg2logaic))##
## Call:
## lm(formula = log(price) ~ aspiration + carbody + carwidth + curbweight +
## enginetype + enginesize + peakrpm + carManufacturer + citympg,
## data = dfm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28430 -0.06153 0.00325 0.06208 0.44761
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.388e+00 7.113e-01 7.575 2.33e-12 ***
## aspirationturbo 1.194e-01 2.940e-02 4.060 7.53e-05 ***
## carbodyhardtop -1.748e-01 7.132e-02 -2.451 0.015277 *
## carbodyhatchback -2.149e-01 6.319e-02 -3.400 0.000842 ***
## carbodysedan -1.650e-01 6.231e-02 -2.648 0.008870 **
## carbodywagon -2.402e-01 6.640e-02 -3.618 0.000393 ***
## carwidth 4.182e-02 1.181e-02 3.542 0.000516 ***
## curbweight 4.432e-04 7.213e-05 6.144 5.71e-09 ***
## enginetypedohcv -2.518e-01 1.764e-01 -1.428 0.155191
## enginetypel 1.857e-01 1.613e-01 1.151 0.251343
## enginetypeohc -4.827e-02 5.443e-02 -0.887 0.376485
## enginetypeohcf 3.230e-01 1.581e-01 2.043 0.042610 *
## enginetypeohcv -6.121e-02 6.296e-02 -0.972 0.332350
## enginetyperotor 2.113e-01 9.566e-02 2.209 0.028509 *
## enginesize 2.087e-03 8.213e-04 2.541 0.011970 *
## peakrpm 5.520e-05 3.476e-05 1.588 0.114183
## carManufactureraudi -2.622e-02 1.142e-01 -0.230 0.818649
## carManufacturerbmw 3.169e-01 1.103e-01 2.872 0.004612 **
## carManufacturerbuick -8.363e-02 1.117e-01 -0.749 0.454916
## carManufacturerchevrolet -2.064e-01 1.292e-01 -1.597 0.112112
## carManufacturerdodge -2.694e-01 1.024e-01 -2.630 0.009325 **
## carManufacturerhonda -1.795e-01 1.051e-01 -1.709 0.089367 .
## carManufacturerisuzu -8.925e-02 1.101e-01 -0.810 0.418946
## carManufacturerjaguar -2.546e-01 1.355e-01 -1.878 0.062098 .
## carManufacturermazda -1.488e-01 9.609e-02 -1.548 0.123488
## carManufacturermercury -1.556e-01 1.552e-01 -1.002 0.317595
## carManufacturermitsubishi -3.298e-01 1.005e-01 -3.281 0.001261 **
## carManufacturernissan -2.030e-01 9.345e-02 -2.172 0.031255 *
## carManufacturerpeugeot -5.441e-01 1.891e-01 -2.878 0.004532 **
## carManufacturerplymouth -2.935e-01 1.046e-01 -2.805 0.005621 **
## carManufacturerporsche 2.475e-01 1.591e-01 1.556 0.121594
## carManufacturerrenault -3.048e-01 1.275e-01 -2.390 0.017981 *
## carManufacturersaab -5.038e-02 1.014e-01 -0.497 0.619957
## carManufacturersubaru -6.205e-01 1.849e-01 -3.356 0.000978 ***
## carManufacturertoyota -2.127e-01 8.774e-02 -2.424 0.016412 *
## carManufacturervolkswagen -1.725e-01 9.773e-02 -1.765 0.079457 .
## carManufacturervolvo -1.111e-01 1.077e-01 -1.032 0.303519
## citympg -4.162e-03 3.027e-03 -1.375 0.171028
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1184 on 167 degrees of freedom
## Multiple R-squared: 0.9548, Adjusted R-squared: 0.9448
## F-statistic: 95.36 on 37 and 167 DF, p-value: < 2.2e-16
## GVIF Df GVIF^(1/(2*Df))
## aspiration 1.870544e+00 1 1.367678
## carbody 4.724866e+00 4 1.214224
## carwidth 9.343089e+00 1 3.056647
## curbweight 2.053810e+01 1 4.531898
## enginetype 7.457299e+03 6 2.102399
## enginesize 1.702941e+01 1 4.126670
## peakrpm 4.001969e+00 1 2.000492
## carManufacturer 1.198908e+05 21 1.321063
## citympg 5.710770e+00 1 2.389722
## Analysis of Variance Table
##
## Model 1: log(price) ~ aspiration + carbody + carwidth + curbweight + enginetype +
## enginesize + peakrpm + carManufacturer + citympg
## Model 2: log(price) ~ safetyIncr + aspiration + carbody + enginelocation +
## carwidth + curbweight + enginetype + cylinderNum + enginesize +
## stroke + peakrpm + compressionratio + carManufacturer + citympg
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 167 2.34
## 2 158 2.24 9 0.099951 0.7833 0.632
BE notes:
Linking back to original question: “How does fuel economy (citympg) affect a car’s price, after controlling for other characteristics?”
Linking back to original question: “How does fuel economy (citympg) affect a car’s price, after controlling for other characteristics?”
Coefficients
| Term | Estimate | Std. Error | t value | Pr(> t ) |
|---|---|---|---|---|
| citympg | -4.162e-03 | 3.027e-03 | -1.375 | 0.171028 |
Answer:
So citympg does not have a statistically significant
effect on log(price) after controlling for other factors (like
curbweight, enginesize, carwidth, and carManufacturer)
Answer:
So citympg does not have a statistically significant
effect on log(price) after controlling for other factors (like
curbweight, enginesize, carwidth, and carManufacturer)
Note here Ardi’s great model looks to address the question: ‘What car features predict a car’s fuel efficiency value?’
BE note: uses log(highwaympg/price) which represents MPG
per dollar (or fuel efficiency value)
lm.mpg3 <- lm(
formula = log(highwaympg / price) ~ carbody + enginetype +
carwidth + curbweight + peakrpm + horsepower + carManufacturer,
data = df
)
summary(lm.mpg3)##
## Call:
## lm(formula = log(highwaympg/price) ~ carbody + enginetype + carwidth +
## curbweight + peakrpm + horsepower + carManufacturer, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34331 -0.08570 0.00625 0.08940 0.31795
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.737e+00 8.655e-01 -2.007 0.046319 *
## carbodyhardtop 2.046e-01 8.749e-02 2.339 0.020501 *
## carbodyhatchback 2.633e-01 7.682e-02 3.428 0.000764 ***
## carbodysedan 2.191e-01 7.577e-02 2.892 0.004330 **
## carbodywagon 2.809e-01 8.026e-02 3.500 0.000594 ***
## enginetypedohcv 1.069e+00 2.271e-01 4.706 5.24e-06 ***
## enginetypel -1.263e-01 1.978e-01 -0.638 0.524136
## enginetypeohc -7.980e-03 6.583e-02 -0.121 0.903655
## enginetypeohcf -9.385e-02 1.956e-01 -0.480 0.632014
## enginetypeohcv 2.543e-02 7.151e-02 0.356 0.722621
## enginetyperotor -3.877e-01 1.024e-01 -3.785 0.000213 ***
## carwidth -2.666e-02 1.422e-02 -1.876 0.062432 .
## curbweight -7.130e-04 7.675e-05 -9.290 < 2e-16 ***
## peakrpm -7.619e-05 3.864e-05 -1.972 0.050277 .
## horsepower -5.866e-03 8.125e-04 -7.219 1.69e-11 ***
## carManufactureraudi -1.322e-01 1.400e-01 -0.944 0.346274
## carManufacturerbmw -3.048e-01 1.291e-01 -2.362 0.019328 *
## carManufacturerbuick -1.186e-01 1.342e-01 -0.884 0.378130
## carManufacturerchevrolet 3.625e-01 1.517e-01 2.390 0.017955 *
## carManufacturerdodge 2.315e-01 1.219e-01 1.899 0.059286 .
## carManufacturerhonda 1.909e-01 1.238e-01 1.543 0.124789
## carManufacturerisuzu 1.151e-01 1.319e-01 0.873 0.383904
## carManufacturerjaguar 2.716e-01 1.444e-01 1.882 0.061615 .
## carManufacturermazda 1.028e-01 1.168e-01 0.880 0.380104
## carManufacturermercury 2.441e-01 1.921e-01 1.271 0.205472
## carManufacturermitsubishi 2.865e-01 1.199e-01 2.389 0.017989 *
## carManufacturernissan 2.316e-01 1.117e-01 2.074 0.039618 *
## carManufacturerpeugeot 3.178e-01 2.355e-01 1.350 0.178909
## carManufacturerplymouth 2.727e-01 1.246e-01 2.189 0.029974 *
## carManufacturerporsche -1.621e-01 1.935e-01 -0.838 0.403178
## carManufacturerrenault 2.260e-01 1.547e-01 1.461 0.145794
## carManufacturersaab 7.343e-02 1.245e-01 0.590 0.556121
## carManufacturersubaru 2.231e-01 2.294e-01 0.973 0.332175
## carManufacturertoyota 2.229e-01 1.061e-01 2.101 0.037135 *
## carManufacturervolkswagen 1.620e-01 1.181e-01 1.371 0.172220
## carManufacturervolvo 9.926e-02 1.297e-01 0.765 0.445099
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1457 on 169 degrees of freedom
## Multiple R-squared: 0.9639, Adjusted R-squared: 0.9564
## F-statistic: 129 on 35 and 169 DF, p-value: < 2.2e-16
## GVIF Df GVIF^(1/(2*Df))
## carbody 4.260997 4 1.198640
## enginetype 4671.186671 6 2.022021
## carwidth 8.943383 1 2.990549
## curbweight 15.354936 1 3.918537
## peakrpm 3.266314 1 1.807295
## horsepower 9.926502 1 3.150635
## carManufacturer 50156.872654 21 1.293936
BE note: Is the fact that we have the message “not plotting observations with leverage one: 19, 76, 126, 130” a problem?
## Start: AIC=-757.45
## log(highwaympg/price) ~ carbody + enginetype + carwidth + curbweight +
## peakrpm + horsepower + carManufacturer
##
## Df Sum of Sq RSS AIC
## <none> 3.5855 -757.45
## - carwidth 1 0.07464 3.6601 -755.23
## - peakrpm 1 0.08248 3.6679 -754.79
## - carbody 4 0.32781 3.9133 -747.52
## - enginetype 6 1.16168 4.7471 -711.92
## - horsepower 1 1.10571 4.6912 -704.35
## - carManufacturer 21 2.81832 6.4038 -680.55
## - curbweight 1 1.83111 5.4166 -674.88
##
## Call:
## lm(formula = log(highwaympg/price) ~ carbody + enginetype + carwidth +
## curbweight + peakrpm + horsepower + carManufacturer, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34331 -0.08570 0.00625 0.08940 0.31795
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.737e+00 8.655e-01 -2.007 0.046319 *
## carbodyhardtop 2.046e-01 8.749e-02 2.339 0.020501 *
## carbodyhatchback 2.633e-01 7.682e-02 3.428 0.000764 ***
## carbodysedan 2.191e-01 7.577e-02 2.892 0.004330 **
## carbodywagon 2.809e-01 8.026e-02 3.500 0.000594 ***
## enginetypedohcv 1.069e+00 2.271e-01 4.706 5.24e-06 ***
## enginetypel -1.263e-01 1.978e-01 -0.638 0.524136
## enginetypeohc -7.980e-03 6.583e-02 -0.121 0.903655
## enginetypeohcf -9.385e-02 1.956e-01 -0.480 0.632014
## enginetypeohcv 2.543e-02 7.151e-02 0.356 0.722621
## enginetyperotor -3.877e-01 1.024e-01 -3.785 0.000213 ***
## carwidth -2.666e-02 1.422e-02 -1.876 0.062432 .
## curbweight -7.130e-04 7.675e-05 -9.290 < 2e-16 ***
## peakrpm -7.619e-05 3.864e-05 -1.972 0.050277 .
## horsepower -5.866e-03 8.125e-04 -7.219 1.69e-11 ***
## carManufactureraudi -1.322e-01 1.400e-01 -0.944 0.346274
## carManufacturerbmw -3.048e-01 1.291e-01 -2.362 0.019328 *
## carManufacturerbuick -1.186e-01 1.342e-01 -0.884 0.378130
## carManufacturerchevrolet 3.625e-01 1.517e-01 2.390 0.017955 *
## carManufacturerdodge 2.315e-01 1.219e-01 1.899 0.059286 .
## carManufacturerhonda 1.909e-01 1.238e-01 1.543 0.124789
## carManufacturerisuzu 1.151e-01 1.319e-01 0.873 0.383904
## carManufacturerjaguar 2.716e-01 1.444e-01 1.882 0.061615 .
## carManufacturermazda 1.028e-01 1.168e-01 0.880 0.380104
## carManufacturermercury 2.441e-01 1.921e-01 1.271 0.205472
## carManufacturermitsubishi 2.865e-01 1.199e-01 2.389 0.017989 *
## carManufacturernissan 2.316e-01 1.117e-01 2.074 0.039618 *
## carManufacturerpeugeot 3.178e-01 2.355e-01 1.350 0.178909
## carManufacturerplymouth 2.727e-01 1.246e-01 2.189 0.029974 *
## carManufacturerporsche -1.621e-01 1.935e-01 -0.838 0.403178
## carManufacturerrenault 2.260e-01 1.547e-01 1.461 0.145794
## carManufacturersaab 7.343e-02 1.245e-01 0.590 0.556121
## carManufacturersubaru 2.231e-01 2.294e-01 0.973 0.332175
## carManufacturertoyota 2.229e-01 1.061e-01 2.101 0.037135 *
## carManufacturervolkswagen 1.620e-01 1.181e-01 1.371 0.172220
## carManufacturervolvo 9.926e-02 1.297e-01 0.765 0.445099
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1457 on 169 degrees of freedom
## Multiple R-squared: 0.9639, Adjusted R-squared: 0.9564
## F-statistic: 129 on 35 and 169 DF, p-value: < 2.2e-16
## GVIF Df GVIF^(1/(2*Df))
## carbody 4.260997 4 1.198640
## enginetype 4671.186671 6 2.022021
## carwidth 8.943383 1 2.990549
## curbweight 15.354936 1 3.918537
## peakrpm 3.266314 1 1.807295
## horsepower 9.926502 1 3.150635
## carManufacturer 50156.872654 21 1.293936
Summary
Not sure if we want to mention it’s flawed due to leverage points and then tell story.
Story is quite clear though:
curbweight and horsepower are the
strongest predictors with significant negative coefficients -> makes
sense:
Rotor engine has strong negative coefficient -> indicating it has a poor fuel efficiency value -> consistent with prior knowledge
TBI
TBI